In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as py

Matplotlib¶

Pie Chart¶

In [2]:
labels = ["Elephants","Tigers","Lions","Giraffe"]
size = [15,30,45,80]

The pie chart shows a percentage distribution of each value

In [3]:
fig,ax = plt.subplots(figsize = (4,4),dpi =100)
explode = (0,0.1,0,0)##0.1 at 2nd place because Tiger is to be exploded
ax.pie(size,labels = labels,autopct = "%2f",shadow = "True",explode = explode)
plt.show()
In [4]:
labels = ["Elephants","Tigers","Lions","Giraffe"]
size = [15,30,45,80]

Histogram¶

Below is a dictionary which is treated as a dataset which is used to generate visualizations

In [5]:
 grocery = {"Milk": 10,
                    "Bread" : 8,
                     "Butter" : 12,
                      "Cheese": 10,
                       "Eggs": 5}
fig,ax = plt.subplots(edgecolor="white", linewidth=0.7)
ax.bar(grocery.keys(),grocery.values())
ax.set_title("Items")
plt.ylabel("Price")
Out[5]:
Text(0, 0.5, 'Price')

Subplots (Matplotlib)¶

In [6]:
fig,((ax1,ax2),(ax3,ax4))= plt.subplots(nrows=2,ncols=2,figsize = (10,6))
x = np.linspace(0,10,100)
ax1.plot(x,x/2)
ax2.scatter(np.random.randn(10),np.random.randn(10))
ax3.bar(grocery.keys(),grocery.values())
ax4.hist(np.random.randn(1000),rwidth=0.9)
plt.show()

Scatter Plot¶

In [7]:
x = np.linspace(0,10,100)
fig,ax  = plt.subplots()
ax.scatter(x,np.exp(x))
Out[7]:
<matplotlib.collections.PathCollection at 0x1d8613848b0>
In [8]:
fig,ax = plt.subplots()
ax.scatter(x,np.sin(x))
Out[8]:
<matplotlib.collections.PathCollection at 0x1d861274dc0>

Seaborn¶

Loading the dataset into the environment¶

In [9]:
tips = sns.load_dataset("tips")
tips
Out[9]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

Pairplot¶

In [10]:
sns.pairplot(tips)
Out[10]:
<seaborn.axisgrid.PairGrid at 0x1d861328550>

Seaborn Relational Plot¶

The Seaborn Relational Plot (relplot) allows us to visualise how variables within a dataset relate to each other

In [11]:
#checking the relation between total bill and tip with respect to gender
sns.relplot(data = tips, x= "total_bill",y = "tip", hue = "sex")
Out[11]:
<seaborn.axisgrid.FacetGrid at 0x1d861245340>

checking the relation between total bill and tip with respect to smoker

In [12]:
sns.relplot(data = tips,x =  "total_bill",y = "tip",hue = "smoker",col = "smoker")
Out[12]:
<seaborn.axisgrid.FacetGrid at 0x1d861b3e820>

Distribution Plot¶

Visualizing the spread of the data (total_bill) with respect to "time" as a categorical feature

In [13]:
sns.displot(data=tips, x="total_bill", col="time", kde=True)
Out[13]:
<seaborn.axisgrid.FacetGrid at 0x1d861c4ca60>

Regression Plot¶

In [14]:
sns.regplot(data = tips, x = "total_bill", y = "tip",color = "red")
Out[14]:
<AxesSubplot: xlabel='total_bill', ylabel='tip'>
In [15]:
sns.get_dataset_names()
Out[15]:
['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

Loading the flights data¶

In [16]:
df = sns.load_dataset("flights")
df.head()
Out[16]:
year month passengers
0 1949 Jan 112
1 1949 Feb 118
2 1949 Mar 132
3 1949 Apr 129
4 1949 May 121
In [17]:
df["year"].unique()
Out[17]:
array([1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959,
       1960], dtype=int64)

Boxplot¶

Visualizing the number of passengers travelled with respect to the categorical feature "month"¶

In [18]:
sns.catplot(data = df,x = "month",y = "passengers",ci = True,kind = "box")
Out[18]:
<seaborn.axisgrid.FacetGrid at 0x1d862f0a6d0>

Boxplot¶

Visualizing the five summary statistics of total bill with respect to "days" and "gender"

In [19]:
sns.catplot(data = tips,x = "day",y = "total_bill",kind = "box",hue = "sex")
Out[19]:
<seaborn.axisgrid.FacetGrid at 0x1d862f3f1c0>

Loading the diamonds dataset¶

In [20]:
diamonds = sns.load_dataset("diamonds")
In [21]:
diamonds
Out[21]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

Visualizing the price of the diamonds respect to the clarity of the diamonds

In [22]:
sns.boxplot(data = diamonds,x = "clarity",y = "price")
Out[22]:
<AxesSubplot: xlabel='clarity', ylabel='price'>

Boxen Plot¶

Visualizing the price of diamonds with respect to color of the diamonds

In [23]:
sns.catplot(data = diamonds,x = "color",y = "price",kind = "boxen")
Out[23]:
<seaborn.axisgrid.FacetGrid at 0x1d8610296a0>

Titanic¶

Visualizing the data with countplot

In [24]:
titanic = sns.load_dataset("titanic")
fig, ax = plt.subplots(3,3,figsize = (16,16))
sns.countplot(data = titanic,x = 'survived',ax = ax[0][0])
sns.countplot(data = titanic,x = 'pclass',ax = ax[0][1])
sns.countplot(data = titanic,x = 'sex',ax = ax[0][2])
sns.countplot(data = titanic,x = 'sibsp',ax = ax[1][0])
sns.countplot(data = titanic,x = 'parch',ax = ax[1][1])## first row first column
sns.countplot(data = titanic,x = 'embarked',ax = ax[1][2])
sns.countplot(data = titanic,x = 'alone',ax = ax[2][0])

sns.distplot(titanic['fare'],kde = True,ax = ax[2][1])

plt.tight_layout()
C:\Users\Admin\anaconda3\lib\site-packages\seaborn\distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)

Visualizing the number of people survived with respect to gender and class as categorical features

In [25]:
sns.catplot(data = titanic, x = "sex", y = "survived", hue = "class", kind = "bar")
Out[25]:
<seaborn.axisgrid.FacetGrid at 0x1d861c353a0>

Plotly¶

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.random import randint
from numpy.random import randn
import seaborn as sns
from plotly.offline import iplot
import plotly as py
import cufflinks as cf

Visualizing the relation between age and fare

In [27]:
import plotly.express as px
fig = px.scatter(titanic, x="age", y="fare", color="survived")
fig.show()

Histogram¶

Visualizing the relation between age and fare with respect to "survived"

In [28]:
import plotly.express as px
fig = px.histogram(titanic, x="age", y="fare", color="survived",
                   marginal="box", # or violin, rug
                   hover_data=titanic.columns)
fig.show()

Histrogram to see the distribution of age feature

In [29]:
import plotly.express as px
fig = px.histogram(titanic, x="age")
fig.show()
In [30]:
import plotly.express as px
fig = px.histogram(np.random.rand(100,2))
fig.show()

Visualizing total bill with respect to day

In [31]:
import plotly.express as px
fig = px.box(tips, x="day", y="total_bill",color="day")
fig.show()

Visualizing five summary stats for total bill with respect to day and smoker as a categorical feature

In [32]:
import plotly.express as px
fig = px.box(tips, x="day", y="total_bill",color="smoker")
fig.show()
In [33]:
import plotly.express as px
df = px.data.gapminder().query("year == 2007").query("continent == 'Europe'")
df.loc[df['pop'] < 2.e6, 'country'] = 'Other countries' # Represent only large countries
fig = px.pie(df, values='pop', names='country', title='Population of European continent')
fig.show()
In [34]:
diamonds
Out[34]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

In [35]:
diamonds[diamonds["cut"]=="Ideal"].max()
Out[35]:
carat        3.50
depth       66.70
table       63.00
price    18806.00
x            9.65
y           31.80
z            6.03
dtype: float64

Plotly Pie charts¶

Visualizing Price and cut with pie chart

In [36]:
fig = px.pie(diamonds, values='price', names='cut')
fig.show()

Visualizng price with respect to clarity

In [37]:
fig = px.pie(diamonds, values='price', names='clarity')
fig.show()

Visualizing price and cut using boxplot

In [38]:
import plotly.express as px
fig = px.box(diamonds, x="cut", y="price")
fig.show()